/*
  author Sylvain Bertrand <sylvain.bertrand@gmail.com>
  Protected by linux GNU GPLv2
  Copyright 2012-2014
*/
#include <linux/module.h>
#include <linux/device.h>
#include <linux/i2c.h>
#include <linux/delay.h>
#include <linux/wait.h>

#include <uapi/alga/pixel_fmts.h>
#include <alga/timing.h>
#include <uapi/alga/amd/dce6/dce6.h>
#include <alga/amd/dce6/dce6_dev.h>
#include <alga/amd/atombios/atb.h>
#include <alga/amd/atombios/dce.h>
#include <uapi/alga/amd/si/ioctl.h>

#include "dce6.h"
#include "sink.h"
#include "regs.h"

#define WR32(val, of) dce->ddev.wr32(dce->ddev.dev, (val), (of))
#define RR32(of) dce->ddev.rr32(dce->ddev.dev, (of))
long crtc_fb(struct dce6 *dce, u8 i, struct sink_db_fb *db_fb)
{
	u32 fb_fmt;
	u32 fb_swap;
	u32 viewport_h;
	u32 viewport_v;
	u32 grph_flip_ctl;

	fb_swap = set(GSC_ENDIAN_SWAP, GSC_ENDIAN_NONE);
	switch (db_fb->pixel_fmt) {
	case ALGA_ARGB6666:
	case ALGA_ARGB8888:
		fb_fmt = (set(GC_DEPTH, GC_DEPTH_32BPP)
						| set(GC_FMT, GC_FMT_ARGB8888));
		break;
	case ALGA_ARGB2101010:
		fb_fmt = (set(GC_DEPTH, GC_DEPTH_32BPP)
					| set(GC_FMT, GC_FMT_ARGB2101010));
		break;
	default:
		dev_err(dce->ddev.dev, "unsupported pixel format %d\n",
							db_fb->pixel_fmt);
		return -DCE6_ERR;
	}

	WR32(upper_32_bits(db_fb->primary),
					regs_grph_primary_surf_addr_high[i]);
	WR32(upper_32_bits(db_fb->secondary),
				regs_grph_secondary_surf_addr_high[i]);
	WR32((u32)(db_fb->primary) & GPSA_SURF_ADDR_MASK,
					regs_grph_primary_surf_addr[i]);
	WR32((u32)(db_fb->secondary) & GSSA_SURF_ADDR_MASK,
					regs_grph_secondary_surf_addr[i]);

	/*
	 * store the surface gpu addresses for page flipping without reading
	 * the content of the regs
	 */
	dce->dps[i].pf.crtc_surfs.primary = db_fb->primary;
	dce->dps[i].pf.crtc_surfs.secondary = db_fb->secondary;

	WR32(fb_fmt, regs_grph_ctl[i]);
	WR32(fb_swap, regs_grph_swap_ctl[i]);
	WR32(0, regs_grph_surf_of_x[i]);
	WR32(0, regs_grph_surf_of_y[i]);
	/* pixels */
	WR32(0, regs_grph_x_start[i]);
	WR32(0, regs_grph_y_start[i]);
	WR32(db_fb->timing->h, regs_grph_x_end[i]);
	WR32(db_fb->timing->v, regs_grph_y_end[i]);
	/* if the pitch is set to 0, use the horizontal pixels */
	if (db_fb->pitch == 0)
		WR32(db_fb->timing->h, regs_grph_pitch[i]);
	else
		WR32(db_fb->pitch, regs_grph_pitch[i]);
	WR32(1, regs_grph_ena[i]);
	WR32(db_fb->timing->v, regs_desktop_height[i]);
	WR32((0 << 16) | 0, regs_viewport_start[i]);
	viewport_h = db_fb->timing->h;
	viewport_v = (db_fb->timing->v + 1) & ~1;
	WR32((viewport_h << 16) | viewport_v, regs_viewport_sz[i]);

	/* XXX: obsolete pageflip setup */
	/* make sure flip is at vb rather than hb */
	grph_flip_ctl = dce->ddev.rr32(dce->ddev.dev, regs_grph_flip_ctl[i]);
	grph_flip_ctl &= ~GFC_SURF_UPDATE_H_RETRACE_ENA;
	WR32(grph_flip_ctl, regs_grph_flip_ctl[i]);

	/* set pageflip to happen anywhere in vblank interval */
	WR32(0, regs_master_update_mode[i]);
	return 0;
}

void crtc_lut(struct dce6 *dce, u8 i)
{
	u16 j;
	u16 v;

	WR32(set(ICC_MODE_GRPH, ICC_BYPASS) | set(ICC_MODE_OVL, ICC_BYPASS),
							regs_input_csc_ctl[i]);
	WR32(GPC_PRESCALE_BYPASS, regs_grph_prescale_ctl[i]);
	WR32(OPC_PRESCALE_BYPASS, regs_ovl_prescale_ctl[i]);
	/* TODO: nowadays use of gamma??? Should go bypass */
	WR32(set(IGC_MODE_GRPH, IGC_USE_LUT) | set(IGC_MODE_OVL, IGC_USE_LUT),
						regs_input_gamma_ctl[i]);
	WR32(0, regs_lut_ctl[i]);

	WR32(0, regs_lut_black_of_blue[i]);
	WR32(0, regs_lut_black_of_green[i]);
	WR32(0, regs_lut_black_of_red[i]);

	WR32(0xffff, regs_lut_white_of_blue[i]);
	WR32(0xffff, regs_lut_white_of_green[i]);
	WR32(0xffff, regs_lut_white_of_red[i]);

	WR32(0, regs_lut_rw_mode[i]);
	WR32(0x00000007, regs_lut_write_ena_mask[i]);

	WR32(0, regs_lut_rw_idx[i]);

	/*
	 * XXX: gamma hardcoded, 256 linear values of 10 bits, range 0x400
	 * (1024)
	 */
	for (v = 0, j = 0; j < 256; ++j, v+=4)
		WR32((v << 20) | (v << 10) | (v << 0), regs_lut_30_color[i]);

	WR32(set(DC_MODE_GRPH, DC_BYPASS) | set(DC_MODE_OVL, DC_BYPASS)
				| set(DC_MODE_ICON, DC_BYPASS)
				| set(DC_MODE_CURSOR, DC_BYPASS),
							regs_degamma_ctl[i]);
	WR32(set(GRC_MODE_GRPH, GRC_BYPASS) | set(GRC_MODE_OVL, GRC_BYPASS),
							regs_gamut_ctl[i]);
	WR32(set(RC_MODE_GRPH, RC_BYPASS) | set(RC_MODE_OVL, RC_BYPASS),
							regs_regamma_ctl[i]);
	WR32(set(OCC_MODE_GRPH, OCC_BYPASS) | set(OCC_MODE_OVL, OCC_BYPASS),
							regs_output_csc_ctl[i]);
	/* TODO: should match the depth of the crtc fmt block, modesetting ? */
	WR32(0, regs_gamma_unknown[i]);
}

void crtcs_intr_reset(struct dce6 *dce)
{
	u8 i;

	for (i = 0; i < dce->ddev.crtcs_n; ++i)
		WR32(0, regs_crtc_int_mask[i]);

	for (i = 0; i < dce->ddev.crtcs_n; ++i)
		WR32(0, regs_crtc_grph_int_ctl[i]);

	WR32(0, DAC_AUTODETECT_INT_CTL);
}

long dce6_crtcs_shutdown(struct dce6 *dce)
{
	u8 i;
	long r;

	/* blank */
	for (i = 0; i < dce->ddev.crtcs_n; ++i) {
		r = atb_crtc_blank(dce->ddev.atb, i, 1);
		if (r != 0)
			return -DCE6_ERR;
	}

	/* disable */
	for (i = 0; i < dce->ddev.crtcs_n; ++i) {
		r = atb_crtc(dce->ddev.atb, i, 0);
		if (r != 0)
			return -DCE6_ERR;
	}

	/* grph */
	for (i = 0; i < dce->ddev.crtcs_n; ++i)
		WR32(0, regs_grph_ena[i]);

	/* power gate */
	for (i = 0; i < dce->ddev.crtcs_n; ++i) {
		r = atb_crtc_pair_pwr_gate(dce->ddev.atb, i, 1);
		if (r != 0)
			return -DCE6_ERR;
	}
	return 0;
}
EXPORT_SYMBOL_GPL(dce6_crtcs_shutdown);

static long page_flip(struct dce6 *dce, u8 i)
{
	u32 grph_update;
	u64 swap;
	struct crtc_surfs *css;
	u32 j;
	long r;

	r = 0;

	css = &dce->dps[i].pf.crtc_surfs;

	swap = css->secondary;
	css->secondary = css->primary;
	css->primary = swap;
	
	grph_update = RR32(regs_grph_update[i]);
	grph_update |= GU_UPDATE_LOCK;
	WR32(grph_update, regs_grph_update[i]);

	WR32(upper_32_bits(css->primary), regs_grph_primary_surf_addr_high[i]);
	WR32(upper_32_bits(css->secondary),
					regs_grph_secondary_surf_addr_high[i]);
	WR32((u32)(css->primary) & GPSA_SURF_ADDR_MASK,
						regs_grph_primary_surf_addr[i]);
	WR32((u32)(css->secondary) & GSSA_SURF_ADDR_MASK,
					regs_grph_secondary_surf_addr[i]);

	/* wait for the notification to go high */
        for (j = 0; j < USEC_TIMEOUT; ++j) {
                if (RR32(regs_grph_update[i]) & GU_SURF_UPDATE_PENDING)
                        break;
                udelay(1);
        }
	if (j == USEC_TIMEOUT) {
		dev_err(dce->ddev.dev, "dce6: page flip on %u timed out\n", i);
		r = -DCE6_ERR;
	}

	grph_update &= ~GU_UPDATE_LOCK;
	WR32(grph_update, regs_grph_update[i]);
	return r;
}

/*
 * this is executed in hard irq context because we want the page flip to happen
 * as soon as possible
 */
void dce6_pf_irq(struct dce6 *dce, u8 i, struct timespec monotonic_raw_tp)
{
	u32 crtc_int_mask;
	u32 vblank_n;

	vblank_n = RR32(regs_crtc_status_frame_cnt[i]);

	/*
	 * XXX: we can be in the vblank handler only to notify a page flip
	 * completion.
	 * There are 2 things to keep in mind:
	 *   - the vblank interrupt can happen a bit *before* the real vblank.
	 *     (was told it's an hardware bug).
	 *   - the page flip will be done in hardware during the vblank, you
	 *     don't know when (insanity since they have the specs).
	 * It is supposed *not* to matter since, in vblank, the pixel of the
	 * visible frame buffer are supposed to be sent already to the display
	 * device.
	 * Meaning, starting to draw the next frame into the "maybe" not yet
	 * flipped visible framebuffer should have very little chance/no chance to
	 * glitch what's actually on screen.
	 * If it does glitch for whatever reason in some cases, will
	 * wait for the update pending bit to go low, hard way.
	 */

	/* page flip is done */

	/* we don't need the vblank interupt anymore */
	crtc_int_mask = RR32(regs_crtc_int_mask[i]);
	crtc_int_mask &= ~IM_VBLANK_INT_MASK;
	WR32(crtc_int_mask, regs_crtc_int_mask[i]);

	spin_lock(&dce->dps[i].pf.lock);
	dce->dps[i].pf.notify(i, vblank_n, monotonic_raw_tp,
							dce->dps[i].pf.data);
	dce->dps[i].pf.notify = NULL;
	dce->dps[i].pf.data = NULL;
	spin_unlock(&dce->dps[i].pf.lock);
}
EXPORT_SYMBOL_GPL(dce6_pf_irq);

long dce6_pf(struct dce6 *dce, u8 i,
	void (*notify)(u8 i, u32 vblanks_n, struct timespec monotonic_raw_tp,
						void *data), void *data)
{
	long r;
	u32 crtc_int_mask;

	r = 0;

	lock(dce);

	/* if we are alreading waiting for a pf, notify the user */
	spin_lock_irq(&dce->dps[i].pf.lock);
	if (dce->dps[i].pf.notify) {
		spin_unlock_irq(&dce->dps[i].pf.lock);
		r = SI_DCE_PF_PENDING;
		goto unlock_dce;
	}
	spin_unlock_irq(&dce->dps[i].pf.lock);

	r = page_flip(dce, i);
	if (r == -DCE6_ERR)
		goto unlock_dce;

	/* the notify function will be run in hard irq context */
	dce->dps[i].pf.notify = notify;
	dce->dps[i].pf.data = data;

	/* a page flip happens in a vblank */
	crtc_int_mask = RR32(regs_crtc_int_mask[i]);
	crtc_int_mask |= IM_VBLANK_INT_MASK;
	WR32(crtc_int_mask, regs_crtc_int_mask[i]);

unlock_dce:
	unlock(dce);
	return r;
}
EXPORT_SYMBOL_GPL(dce6_pf);

/*
 * The mutex protects against client concurrent accesses, the spinlock against
 * irq handlers concurrent accesses.
 */
void dce6_pf_cancel_all(struct dce6 *dce)
{
	u8 i;

	lock(dce);
	for (i = 0; i < dce->ddev.crtcs_n; ++i) {
                u32 crtc_int_mask;

		if ((dce->dps_used & BIT(i)) == 0)
			continue;

		/* shut down the vblank interrupt */
		crtc_int_mask = RR32(regs_crtc_int_mask[i]);
		crtc_int_mask &= ~IM_VBLANK_INT_MASK;
		WR32(crtc_int_mask, regs_crtc_int_mask[i]);

		spin_lock_irq(&dce->dps[i].pf.lock);
		dce->dps[i].pf.notify = NULL;
		dce->dps[i].pf.data = NULL;
		spin_unlock_irq(&dce->dps[i].pf.lock);
	}
	unlock(dce);
}
EXPORT_SYMBOL_GPL(dce6_pf_cancel_all);

void crtcs_atb_states_init(struct dce6 *dce)
{
	u8 i;

	for (i = 0; i < dce->ddev.crtcs_n; ++i)
		atb_dp_state(dce->ddev.atb, i, 0);
}
